{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 41. 自然语言处理"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 41.1.常用包"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 41.2.包导入及设置"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pynlpir as pynlpir"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np   \n",
    "import pandas as pd  \n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns; sns.set()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "% matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from matplotlib.font_manager import FontProperties\n",
    "font = FontProperties(fname=r'c:\\windows\\fonts\\simhei.ttf', size=15) "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 41.3.数据读入"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'主持人：中国中央电视台！主持人：中国中央电视台！主持人：此刻我们在北京中央电视台一号演播大厅向全球现场直播《2017年春节联欢晚会》。主持人：春回大地百花艳，节至人间万象新，一年一度的春节联欢晚会又一次如约而至。主持人：连续举办34年的央视春晚，已经成为伴随13亿中华儿女和全球华人辞旧迎新的新年俗。'"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "text=open('CCTVSpringFestvialGala/2017.txt', 'r').read().replace('\\n','')\n",
    "text[:150]    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 41.4.分词处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pynlpir.open()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "part of speech not recognized: 'gtqg'\n",
      "part of speech not recognized: 'gtqg'\n",
      "part of speech not recognized: 'gtqg'\n",
      "part of speech not recognized: 'gtqg'\n",
      "part of speech not recognized: 'grqg'\n",
      "part of speech not recognized: 'grqg'\n",
      "part of speech not recognized: 'gwz'\n",
      "part of speech not recognized: 'gwz'\n",
      "part of speech not recognized: 'gwz'\n",
      "part of speech not recognized: 'gtqg'\n",
      "part of speech not recognized: 'gtqg'\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[('主持人', '名词'),\n",
       " ('：', '标点符号'),\n",
       " ('中国', '名词'),\n",
       " ('中央电视台', None),\n",
       " ('！', '标点符号'),\n",
       " ('主持人', '名词'),\n",
       " ('：', '标点符号'),\n",
       " ('中国', '名词'),\n",
       " ('中央电视台', None),\n",
       " ('！', '标点符号'),\n",
       " ('主持人', '名词'),\n",
       " ('：', '标点符号'),\n",
       " ('此刻', '代词'),\n",
       " ('我们', '代词'),\n",
       " ('在', '介词'),\n",
       " ('北京', '名词'),\n",
       " ('中央电视台', None),\n",
       " ('一', '数词'),\n",
       " ('号', '量词'),\n",
       " ('演播', '动词')]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pynlpir.segment(text,pos_names='parent',pos_english=False)[:20]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 41.5.自定义词汇"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pynlpir.nlpir.AddUserWord('央视'.encode('utf8'),'noun')\n",
    "pynlpir.nlpir.AddUserWord('主持人：'.encode('utf8'),'noun')\n",
    "pynlpir.nlpir.AddUserWord('观众朋友们'.encode('utf8'),'noun')\n",
    "pynlpir.nlpir.AddUserWord('春联'.encode('utf8'),'noun')\n",
    "pynlpir.nlpir.AddUserWord('一号演播大厅'.encode('utf8'),'noun')\n",
    "pynlpir.nlpir.AddUserWord('综合频道'.encode('utf8'),'noun')\n",
    "pynlpir.nlpir.AddUserWord('综艺频道'.encode('utf8'),'noun')\n",
    "pynlpir.nlpir.AddUserWord('中文国际频道'.encode('utf8'),'noun')\n",
    "pynlpir.nlpir.AddUserWord('军事农业频道'.encode('utf8'),'noun')\n",
    "pynlpir.nlpir.AddUserWord('少儿频道'.encode('utf8'),'noun')\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "part of speech not recognized: 'gtqg'\n",
      "part of speech not recognized: 'gtqg'\n",
      "part of speech not recognized: 'gtqg'\n",
      "part of speech not recognized: 'gtqg'\n",
      "part of speech not recognized: 'grqg'\n",
      "part of speech not recognized: 'grqg'\n",
      "part of speech not recognized: 'gwz'\n",
      "part of speech not recognized: 'gwz'\n",
      "part of speech not recognized: 'gwz'\n",
      "part of speech not recognized: 'gtqg'\n",
      "part of speech not recognized: 'gtqg'\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[('主持人：', '名词'),\n",
       " ('中国', '名词'),\n",
       " ('中央电视台', None),\n",
       " ('！', '标点符号'),\n",
       " ('主持人：', '名词'),\n",
       " ('中国', '名词'),\n",
       " ('中央电视台', None),\n",
       " ('！', '标点符号'),\n",
       " ('主持人：', '名词'),\n",
       " ('此刻', '代词'),\n",
       " ('我们', '代词'),\n",
       " ('在', '介词'),\n",
       " ('北京', '名词'),\n",
       " ('中央电视台', None),\n",
       " ('一号演播大厅', '名词'),\n",
       " ('向', '介词'),\n",
       " ('全球', '名词'),\n",
       " ('现场', '处所词'),\n",
       " ('直播', '动词'),\n",
       " ('《', '标点符号')]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pynlpir.segment(text,pos_names='parent',pos_english=False)[:20]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "part of speech not recognized: 'gtqg'\n",
      "part of speech not recognized: 'gtqg'\n",
      "part of speech not recognized: 'gtqg'\n",
      "part of speech not recognized: 'gtqg'\n",
      "part of speech not recognized: 'grqg'\n",
      "part of speech not recognized: 'grqg'\n",
      "part of speech not recognized: 'gwz'\n",
      "part of speech not recognized: 'gwz'\n",
      "part of speech not recognized: 'gwz'\n",
      "part of speech not recognized: 'gtqg'\n",
      "part of speech not recognized: 'gtqg'\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[['中央电视台', None, 2017],\n",
       " ['！', '标点符号', 2017],\n",
       " ['主持人：', '名词', 2017],\n",
       " ['中国', '名词', 2017],\n",
       " ['中央电视台', None, 2017],\n",
       " ['！', '标点符号', 2017],\n",
       " ['主持人：', '名词', 2017],\n",
       " ['此刻', '代词', 2017],\n",
       " ['我们', '代词', 2017],\n",
       " ['在', '介词', 2017],\n",
       " ['北京', '名词', 2017]]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "words = []\n",
    "year=2017\n",
    "year_words = []\n",
    "year_words.extend(pynlpir.segment(text,pos_names='parent',pos_english=False))\n",
    "  \n",
    "for j in range(len(year_words)):\n",
    "    ls_year_words=list(year_words[j])\n",
    "    ls_year_words.append(year)\n",
    "    words.append(ls_year_words)\n",
    " \n",
    "words[2:13]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>词汇</th>\n",
       "      <th>词性</th>\n",
       "      <th>年份</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>主持人：</td>\n",
       "      <td>名词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>中国</td>\n",
       "      <td>名词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>中央电视台</td>\n",
       "      <td>None</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>！</td>\n",
       "      <td>标点符号</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>主持人：</td>\n",
       "      <td>名词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>中国</td>\n",
       "      <td>名词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>中央电视台</td>\n",
       "      <td>None</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>！</td>\n",
       "      <td>标点符号</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>主持人：</td>\n",
       "      <td>名词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>此刻</td>\n",
       "      <td>代词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>我们</td>\n",
       "      <td>代词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>在</td>\n",
       "      <td>介词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>北京</td>\n",
       "      <td>名词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>中央电视台</td>\n",
       "      <td>None</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>一号演播大厅</td>\n",
       "      <td>名词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>向</td>\n",
       "      <td>介词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>全球</td>\n",
       "      <td>名词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>现场</td>\n",
       "      <td>处所词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>直播</td>\n",
       "      <td>动词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>《</td>\n",
       "      <td>标点符号</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>2017年</td>\n",
       "      <td>时间词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>春节</td>\n",
       "      <td>时间词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>联欢</td>\n",
       "      <td>动词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>晚会</td>\n",
       "      <td>名词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>》</td>\n",
       "      <td>标点符号</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        词汇    词性    年份\n",
       "0     主持人：    名词  2017\n",
       "1       中国    名词  2017\n",
       "2    中央电视台  None  2017\n",
       "3        ！  标点符号  2017\n",
       "4     主持人：    名词  2017\n",
       "5       中国    名词  2017\n",
       "6    中央电视台  None  2017\n",
       "7        ！  标点符号  2017\n",
       "8     主持人：    名词  2017\n",
       "9       此刻    代词  2017\n",
       "10      我们    代词  2017\n",
       "11       在    介词  2017\n",
       "12      北京    名词  2017\n",
       "13   中央电视台  None  2017\n",
       "14  一号演播大厅    名词  2017\n",
       "15       向    介词  2017\n",
       "16      全球    名词  2017\n",
       "17      现场   处所词  2017\n",
       "18      直播    动词  2017\n",
       "19       《  标点符号  2017\n",
       "20   2017年   时间词  2017\n",
       "21      春节   时间词  2017\n",
       "22      联欢    动词  2017\n",
       "23      晚会    名词  2017\n",
       "24       》  标点符号  2017"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_words = pd.DataFrame(words,columns=[\"词汇\",\"词性\",\"年份\"])\n",
    "df_words.head(25)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6473"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_words.index.size"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 41.6.停用词处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'主持人:\\n主持人：\\n主持词\\n(\\n（\\n？\\n'"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stopwords= open('CCTVSpringFestvialGala\\stopwords.txt').read()\n",
    "stopwords[:20]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "for i in range(df_words.shape[0]):\n",
    "    if(df_words.词汇[i] in stopwords):\n",
    "        df_words.drop(i,inplace=True)\n",
    "  \n",
    "    else:\n",
    "        pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>词汇</th>\n",
       "      <th>词性</th>\n",
       "      <th>年份</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>中国</td>\n",
       "      <td>名词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>中央电视台</td>\n",
       "      <td>None</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>中国</td>\n",
       "      <td>名词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>中央电视台</td>\n",
       "      <td>None</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>此刻</td>\n",
       "      <td>代词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>我们</td>\n",
       "      <td>代词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>北京</td>\n",
       "      <td>名词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>中央电视台</td>\n",
       "      <td>None</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>一号演播大厅</td>\n",
       "      <td>名词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>向</td>\n",
       "      <td>介词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>全球</td>\n",
       "      <td>名词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>现场</td>\n",
       "      <td>处所词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>直播</td>\n",
       "      <td>动词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>2017年</td>\n",
       "      <td>时间词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>春节</td>\n",
       "      <td>时间词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        词汇    词性    年份\n",
       "1       中国    名词  2017\n",
       "2    中央电视台  None  2017\n",
       "5       中国    名词  2017\n",
       "6    中央电视台  None  2017\n",
       "9       此刻    代词  2017\n",
       "10      我们    代词  2017\n",
       "12      北京    名词  2017\n",
       "13   中央电视台  None  2017\n",
       "14  一号演播大厅    名词  2017\n",
       "15       向    介词  2017\n",
       "16      全球    名词  2017\n",
       "17      现场   处所词  2017\n",
       "18      直播    动词  2017\n",
       "20   2017年   时间词  2017\n",
       "21      春节   时间词  2017"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_words.head(15)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3976"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_words.shape[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 41.7.词性分布分析"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>词性</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>名词</th>\n",
       "      <td>1248</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>动词</th>\n",
       "      <td>963</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>代词</th>\n",
       "      <td>315</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>形容词</th>\n",
       "      <td>266</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>副词</th>\n",
       "      <td>213</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>量词</th>\n",
       "      <td>199</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>时间词</th>\n",
       "      <td>181</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>数词</th>\n",
       "      <td>180</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>介词</th>\n",
       "      <td>94</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>方位词</th>\n",
       "      <td>65</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       词性\n",
       "名词   1248\n",
       "动词    963\n",
       "代词    315\n",
       "形容词   266\n",
       "副词    213\n",
       "量词    199\n",
       "时间词   181\n",
       "数词    180\n",
       "介词     94\n",
       "方位词    65"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_WordSpeechDistribution = pd.DataFrame(df_words['词性'].value_counts(ascending=False))\n",
    "\n",
    "df_WordSpeechDistribution.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>频数</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>名词</th>\n",
       "      <td>1248</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>动词</th>\n",
       "      <td>963</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>代词</th>\n",
       "      <td>315</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>形容词</th>\n",
       "      <td>266</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>副词</th>\n",
       "      <td>213</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       频数\n",
       "名词   1248\n",
       "动词    963\n",
       "代词    315\n",
       "形容词   266\n",
       "副词    213"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_WordSpeechDistribution.rename(columns={'词性':'频数'},inplace=True)\n",
    "\n",
    "df_WordSpeechDistribution.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3965"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_WordSpeechDistribution['频数'].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>频数</th>\n",
       "      <th>百分比</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>名词</th>\n",
       "      <td>1248</td>\n",
       "      <td>0.314754</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>动词</th>\n",
       "      <td>963</td>\n",
       "      <td>0.242875</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>代词</th>\n",
       "      <td>315</td>\n",
       "      <td>0.079445</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>形容词</th>\n",
       "      <td>266</td>\n",
       "      <td>0.067087</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>副词</th>\n",
       "      <td>213</td>\n",
       "      <td>0.053720</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>量词</th>\n",
       "      <td>199</td>\n",
       "      <td>0.050189</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>时间词</th>\n",
       "      <td>181</td>\n",
       "      <td>0.045649</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>数词</th>\n",
       "      <td>180</td>\n",
       "      <td>0.045397</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>介词</th>\n",
       "      <td>94</td>\n",
       "      <td>0.023707</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>方位词</th>\n",
       "      <td>65</td>\n",
       "      <td>0.016393</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       频数       百分比\n",
       "名词   1248  0.314754\n",
       "动词    963  0.242875\n",
       "代词    315  0.079445\n",
       "形容词   266  0.067087\n",
       "副词    213  0.053720\n",
       "量词    199  0.050189\n",
       "时间词   181  0.045649\n",
       "数词    180  0.045397\n",
       "介词     94  0.023707\n",
       "方位词    65  0.016393"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_WordSpeechDistribution['百分比'] = df_WordSpeechDistribution['频数'] / df_WordSpeechDistribution['频数'].sum()\n",
    "df_WordSpeechDistribution.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Text(0.5,1,'2017央视春晚主持人【主持词】词性分布分析')"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAccAAAFNCAYAAAB116QMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3XmYHWWZ9/FvZyEB0sjWyChLlJBfQAwIjBsIGYNGSRQGFSIKoqOiIxkVFHFLABGRQecFRGBABAMIBJcMi8oioKC8bKKOyK2IvGwKQZGELUun3z+e55BKpdekz6nT5/w+19VX96l6TtVzd5+uu56lqjp6enowMzOzVUZVXQEzM7Nm4+RoZmZW4uRoZmZW4uRoZmZW4uRoZmZW4uRogyZpfUldgyw7W9JBw7jvfQa7717eO0XSlyRNGOL71l+b/a2r/HveuIr9Nmg/J0p6Xf55lKRxkjqaeb+Sxkvatr41HFQ99pakquvRDsZUXQEbUbYDfitpu4i4f4CyWwPHSLo+Ip6Q9FngOODZQplRwPKI2GwQ+54PHApc21cBSVuy+mf6kYjoAY4HlgLPDGI/tW3tAPxU0vSIuGcQ5TcA7gFmRcT/ltY9AvxbRPx4kLvfB/gfSVtFxCODrXNhf+cD7yst/lhEfLOf9zQy3g7gDEm7A9sDvwJWSFrZS7mNgA0i4rm87eOAuaVy74+I8weq87rsF9id9Nlb4wRC0knAdhHxrv52Lmk08G7g0ohYLmkOsCgiLhlE3Wu+DgRw8GDfsLb7lXQdcEtEzOtl3abAnsABwGci4rEhxDAiODnaUDydvz8+iLJnAv8ObAk8ATwPXBwRh9UKSNoJuKb4JkkbAn8F/gF058UdwIuBcyXVLswdA3QCB0TE9XnZTcAy4ClgD6BT0kzgXcBfgD/nk+5FwJ4RsbSvykfE7yVdTUpSu0XEUwPE+xywLat+R0VL89dgLc/fB/N7Hoxv9ZcYoeHxfg2YA+weEbcBG/RWSNJWwEOkz07NccBrgTcPUL/h3u/zxdf5s7sAeCXpd/EcAzsYOAv4KfBo3t6XJf0wIp4vF849HcsiYll+vTvwKnpJjJJGAeOA5/MJ4Vrvt+BZ4MnCPt4OfJ504rsJ8GfgT8C/AENJ8COCk+MIIGlX0od7F1LS+HpEnJTXHQScCGwMnAN8LiJWlt5/NvCXiDi2sOwB0sGt7IWzcElHAKf3UmZJqWdnp4j4XX7PKGC9iHha0s4RsTiXqR3wkTQZ+GN+uVpdI+IZUtIr1v/VwAURsUMvdSlaBsyJiBtzEt0VOBvYLSLuytv6PvCrcmLMLaExuZ61g8uRwH8D20j6Y6H4WFKL9/kc79i8b4BuSeOArwIH5uUvBb4raSkwHnh7RPzffuJYDvRExPJ+ygzWX4GjygurjDf3JOweEffWPi/A0tpBPXd1blAo31P4eaWkfye1Wtcbyi9ibfebu5tX5FgPBa4k/S8ujogVkrrJn+O83dHlv52kMcDngP8TEY/mxd8m/W1OAj5RKr8xOTH10ot6bz89qy8DHljb/ZaszHHXvDgv2xV4rJck3FKcHJucpI2Aq4BzgbcDrwMuk3QXqZvwItIH/MfAt4AjgNPye0cBJwMfJp1xF01l9THnbYCbgRsKy54F7oyI3fP2BPweGBsR3XnZClY/a34lcLuk54AOSZ3Ay0v7/hlwbN5fbzHvR/rHrXWDbg30SLqjUGwUcE9EvLewbBmrexvw+UJi3IrUZfmhXnZ7IvBxUrKoJewO0gF0v0K5DlJyOBo4BdgR+G1h/YPAr4HXAEdFRLek+4APklq26zNwK3JD0u9uw3yysC6+0kcrsNJ4I+Le/OP2+f1LC70CHaSkPr23gCLiT5K+DRzed9i9W8v9/ob0eQR4E/CS/PXqwnuRVOvK/hYp/qJjSK2t2nbIifVw4HpJd0XEdwrlnyJ17S7LX28ArgN2IPWCdA6yK3Oo+0VS7URmEjBV0mE5pmeBZyLir4PY74jn5Nj8dgTmR8QX8+sfSLqbdDCaClwXEd8AkPRpUrI8LZf9IiDg++WNFlp05PeeDnwzIv5fYXF5LKaTdLbcXVr+QrmI+DX5jF7SJ4H9I+LPpTPdpaxqOa4hIhYCC/M2NgPuJXWDRl/vycpnsp8H/pB/L5BaBeOAO3N9joiIK/O6o4BPFs+GJe0J/Dwixpd3lE88AP5ASv7LgIeBfyYdYJeVz6zz62fz+zv6OfOutei3Io0vra3lwIV9rGt4vKSD7TdJn4+tImK7/DddY3/5PVv1E9vFDDI5Stp+bfcraSwwEbgvL/o26f9qW2BmRFwt6QvApIg4LI/vjSltb9f8nvdExJLiuoi4SdLHgfMkbU06mVmZf3dLCnU4FTg7nxjMBr4A7DRA3EPeb149h/S/9FNSd+kPSH/jvfrYzzhg5TD1dDQNJ8cmFxG3ArfWXucP4iTSQfMwVu/2vBPYVtJmEfE34KKIOE5pgkafJL0CmEU6CPRna9I/yYAkvYzUOjwwLyrOChwFPNbLciT9EJhCGhfpAP6J9Dm9NCe0jfK65aSD2+MR8YbCJk6Q9ET+eQWwbUR05G3/Fdg7Im6VdCOpRQRALwm/X7UDSR4P+nPePqTfz2Tgfkm1yUfrkcby1iMllfWBQ+h7nOY1+fss1i05/jEi/t5H/Rseb54A8iZJ04ALatuWdCSpS7fm8YjYdYAq/WoIdf/jOux3F1JLrdb6voF0kngmvUwOy7/XF363kiYC/5O/fpVfl10FvAiYB8ySdGiuc81/kro0v5BfL2OA3od12W9E3CbpX4EJpDHKP+cu6VHA3pL+kd/fmevRAXwMOK+/Oo00To4jz5GksYgfks6G/1Rbkbu0lpBaHH+LiPt638QaPkWatPEUvDC7bXQv5bYv7q9gVE7ayyKiJ7f2riAdUPbJXcDj8rbHkCbpPErqmiqPG707Vs1MPBk4CHh5RDydl32HdHKwV0SsYE0XA3eRugaLraKNSQeY4kFntZaOpE3ystp218/Li5eAjAW6yy3vgnnAkohYYzwsd8FNAh6hdFJQKDOG1K33TeAISaf2Eedg9Duhp+J4i62MscAlEfGp3Grrtbu95BlSj8VQL0cb6n6XkpITkFrDkm4lTXIZnYcVXtBLK+pCUs/HPaxqffbmKmBv0gnlw4XtfYzU/Q3wt3xC0kH6n6vtuwN4KCImFra3TvsldaP3kCa2Ha80m7kTuCoi9s91uxf4RAx+FvaI4uQ4guRukrnAfhGxLP9zlGebPUs64xvsNl8MzCad/dfMAi7L2+4pnSmOyge92pn006Sz+HHAbnndD0kThBaTkt8ZwHdJY5M7kw5s44G/U2o9RcRzuQvuy8Angem1xJh9mTTmtVDSeyLiH6zuntwyhHTweybXfyzpYPqnvG40ax5YLyEdDGoHndoJQvGgsR5p/Pc/agvyWflH8stdSEltA+C5PrpOx7L6RIei/Ui/yyOBaaSuwzP6KDuQfxpgfTPEC4WW1hBMZN2v0x5wvxHxG+A3knYpLHtMUm2W6gsKY45zgG/kn99Oauk9T7qk6AOkhPKKwvu+DTybe4neUlj+KdJEp9NIXaOb5+X7A18ozAV4C2tOnFuX/b6TNBxzE3AL6ST2K8D9pP/ZtuDkOEJI2hy4HPhqRNQuf3iC1Aor2oihXTbwXuCXEfFQbUEe8xtX2v8k0sSE15Mm1MyMiFt6qed2udwZpNmznwY2jIjazLsTSOOJd0fEFqQDSfH9E0kzTF9N+nyepTWvQ3sO2BcISSeSWr1rXFKQuwAn5O2eTzpAryQdWNZoVUXEjFJdamNwfV6QL2kP4Oesmsj0r7l+f2f1yR4195CSxe7A70rbGgecAJweEUuVrp87VdLCiHiYoZssadvSOPILqo63oNyqHMxx6S0DFxnQ2uy35gbSCd4y0th2bcxxPVYfg18tmShdjlGcWAapp+e6Urk9gC8B/wbcDbxngPqslujXYb9bkf53T2DVGOOHSeOfp9H337Dl+A45I0A+K/8hqYVWnHX6S9KFuLVyk0nJcSgH0kNIrbr+9j+a9A9zYUTcTeryO0+93MUlIv4UEe8kT0OPiGWFxPgi0kXDZ7HmzFKUZrZeRRqP3D0vfn1E7FT8Io31/CfpZKF40ChPhBibv08hXet4Oun6uF6vb1tLvyBdN/fCDMc83rsbcEJEbFz7yqt3jIj1I1/6UnIiKZF8JW9nPnAbabx1w7WoWwdrzppcV8MWr6RdlG4OMZrU+vwH6eDb7yUauWfh39Y2gLXdb1FEdEfE0l4mIS3rqxs8f77fSZ5sVrA1adZvcTu3AFvH4G5u0K+h7JeUMH9HmgBUq0uQuuhfT2H+Q6tzcmxy+UBwGalL8yPAhpIm5FbGRcB7JL0qD5bPA+6IQU61lrQNqZvzJ/2UGUNqAb6cVdfLzSMlv59I2mII4ZxKOvA/WVwo6SylayKXADtHxKGsmrDzG0kPFL+At5ImT3wMeE2h1TgWuLLQDTw2J/AfAKdGmkk7rCKiJ/KlIiVLSHcI+ngv69Yg6ShSIpsdq1+Y/UHSAesaSS9ZiyoepWG87dkwxbsx6VrIG0k3ZHgA+EhOqC8ldQNC38en95KutRuqdd3vWsstym/nfS4sLO8g/X3LSYqIqE0s67NFm0+atqeXk8212W/uYn1TLxO23gVsRjohbwvuVm1+rwRm5p+LXYEX5G6cM0mtiydJZ8JD6W6aATwQEWv8Y8ILs1jPBjYHZuTkRe7y2w/4Hul2cseRLjcpThcfk+tT+0c8hXQjgvm5hbux0jWco0mTGxbkbRcnGQBMLY8rSrq8tr505v4Y6bZlt0o6hjQutYDULVWb6ddDuu7rAUkLcny1cZXFrD5ho9YlW24h164H/Fuka8Z2BmpjOctzvR7MYzfLc7xb5/WrHXSUbsN1KqlF/daIWK3rKyIeVppleSPwa0nHAufkLuPBWB+Yr3RbuOV5n+tXFW82kzTuPCsfjIvxPg18Q+keqPvmOr4gnyCcMsjYh22/pPhrPRFjSZ/bNVqO+SR1POlONbUbA+xAurnCi0j/R7UbJ0wlzUweR7p+uC/jSq/XI534dZBa8lNJY/GrWdv9FhLjWNKko61Jv/OLSv+Lo2jhBpaTY5PLEwL6vDlyRHxa0rdIA+i3FM42i2UO6+O955BahavJSetiUqI9B/h0eUwvT0qYRhpv+RJwrKRdYtVdOMYD4/OZ6yWkmwnU/oEfJCX62qSe20gH/6Lxef2vexnH6iIlvHI8byy8/A7pIu9LSRMRauNAtwO3SlpOmqzwH6TfXXn/NU9RuONINpp0YHkl6VKL3Ukzfk+JwoXZEXEDgKTPkSbYXEbhUph8kL2WdBD65+jjnqaRrhPdlXT7sw/n2PpLjr9lzd6AA0i/C6go3oITSXd56i8hfCrX82Ol5R9gzcs4Bnv/2XXZ7zhWJamDSf8XzxfHw3OPxSjSCckU0uSvfyHdInE+cGQtueQTzFNz2cPKY4QlvyP9/VarS6SZswcCT0fpHrzDtN9azNuT/o6f72X9kO5SNJJ09PS09B2AbC1JmgU8nMcYByq7AWlCwm/6WD8qSre0a4T+JqM0C0n/BDwZ/d/jslh+/GDLWnNQuoH82kyoqnS/Sjfyfy4Gvs9uS3JyNDMzK2nZ/mIzM7O15eRoZmZW4uRoZmZW0jazVVes6O558slnBy7YAjbZZAPaJVZwvK2snWKF9oq3GWLt6urs80qAtmk5jhnT2320W1M7xQqOt5W1U6zQXvE2e6xtkxzNzMwGy8nRzMysxMnRzMysxMnRzMysxMnRzMyspG0u5bhlv3cM27Ymn3v+sG3LzMyaj1uOZmZmJU6OZmZmJQ3pVpXUUX4o6BDfPx24g/wA0nXZlpmZ2UAa1XK8Jj8bbA2STpfUlX/+UH4Sd9kEYB7pCdZXS7qy8HWvpDfVr+pmZtZu6tJylDQa6IiIFXnRfwM7kJ8KLmkU6en22wLTgU9I6iA93f1dwC/zayKiJyIWSnoIeBQ4G6htdzGwF7C8HnGYmVl7qsvDjiW9BZgLrCS19spPiB9Fagm+DXgur/8AUKxMB/AJYEdgH+Aa4Abg9Xm7sCo53hwRN/ZXp1v2e8ewBbrHwu8N16bMzKw6fd54vC4tx4j4MfBjAEm/iIg9y2UkCXgPcHpEXCRpRURcmtd9DrgkIu4HfivpEWBGRCyQNAuYnDczm5QcG2rRoiWN3uWQdHV1Nn0dh5PjbV3tFCu0V7zNEGtXV2ef6xoxIWeipJsLr7cC3g68FTgW2Cwv/5CkW4EHgcOBr5W2052/bxoR0ySdQhtdp2lmZo1T1wk5kl4C3BURe9a+gP8FHo+IrwK/KhSfT2oJvgL4ZUQsHcQufCmKmZkNu3onl8OBH5aWbQY83kvZq0ityhnA1aV1HcAUSXOBLSVdB+wPjB3e6pqZmdWxWzJfkvFWYI/Cso2A8RFRm1DTkZdfRJq5CnAg8KikDwPnR8S5pEk9Ao4GzoqIx/P7tiFN2Lm+XnGYmVn7qdelHNsCJwP7RcTyvOybpJmm3ygUXR9YD3hf4bKP4nZqj4peAMyPiMWlIjOBO4HbhzcCMzNrZ3W5lANA0pjeEl6FeqqeGdUozTALrJEcb+tqp1ihveJthli7ujr7vJSjbmOOTZYYzczMBs2zPc3MzEqcHM3MzEqcHM3MzEqcHM3MzEqcHM3MzEqcHM3MzEqcHM3MzEqcHM3MzEqcHM3MzEqcHM3MzEqcHM3MzEqcHM3MzErq9jzHZnPgpR9t6P7OeOPJDd2fmZkNH7cczczMSpwczczMSpqqW1VSR0Ss8fRlSdOBO4DFAL2VMTMzGy5NkxwljQXulHR0RPy4tHoCMA+4ADhJUndh3SRgTkRc26CqmplZi2ua5Ah8BfgJMEfSfRFxX21FRCyU9BDwKHA2sCKvWgzsBSxvdGXNzKx1NcWYo6SjgG2Bo4GPApdIOjCvO0jSOcB2QA+wsrKKmplZW6i05ShpY+DrwKbAF/NY4oOSjgc+nr8fQupOnRERCyTNAibnTcwmtRybTldXZ1vvv9Ecb+tqp1ihveJt5lir7lbdAngA+CRwE7BLXn4S8ApgD+BXwGuB2jjjphExTdIpVF//Pi1atKSyfXd1dVa6/0ZzvK2rnWKF9oq3GWLtLzlXmlwi4g/A8ZJeAjxWWLUstyJvBpDU1yaaolvYzMxaS7Mklz2Au/pZ3wFMkTQX2FLSdcD+wNhGVM7MzNpL5clR0ouB44Fv9VNsKiDgfOCAiNgnIiaRZqnuCCytdz3NzKx9NMOY3X8DJxcv3QA2kDQqImozUxcA8yNicem9M4E7gdsbUE8zM2sTHT091d5spq+74tRBT9WDv43SDAPdjeR4W1c7xQrtFW8zxNrV1dnR17rKu1V9KzgzM2s2lSdHMzOzZuPkaGZmVuLkaGZmVuLkaGZmVuLkaGZmVuLkaGZmVuLkaGZmVuLkaGZmVuLkaGZmVuLkaGZmVuLkaGZmVuLkaGZmVtIMj6xqiLcdtbDqKgy78455Y9VVMDNrSW45mpmZlTg5mpmZlYyIblVJ04E7gMXgZ0CamVl9VZ4cJR0P3ADsAywBzgAuB/aNiO5cbAIwD7gAOElSd2ETk4A5EXFt42ptZmatrNLkKGkCqTX4OmALYEtgW+CZiOiWNAogIhZKegh4FDgbWJE3sRjYC1je6LqbmVnrqnrM8UXAZsBngSnAE8ARwCRJPwMeBj4t6RxgO6AHWFlRXc3MrE1U3a3aDUwEvgaI1HKcCnweuA84PCK+KmlPYEZELJA0C5ic3z+b1HJsS11dnWu1rhU53tbVTrFCe8XbzLFWnRzHAMcBhwBfJ3WPngjsBiwD7i+UrY0zbhoR0ySdQvX1r9SiRUt6Xd7V1dnnulbkeFtXO8UK7RVvM8TaX3KuOrlsC5wAbA/sDOwCvAz4fl7/owHeX3W3sJmZtaBKk0tE3AJcBlwPXAhcFBHLgbuA/YHbc9EOYIqkucCWkq7L68c2vtZmZtbqmqHldRYpOX4B2EnSy4BXAEuBXXOZqaQxyfOBAyJin4iYROqG3TGXNTMzGxZVX8qxCXAmaWzxtaRE923gU8BjwOWSDgYWAPMjYnFpEzOBO1nVwjQzM1tnlSbHiHhS0nsjYgWApNuB6bWL/yW9tr+74UTEmQ2qqpmZtZGqJ+RQS4z55x5WzUod1tvEXfG1/SqfGWVmZiNDM4w5mpmZNRUnRzMzsxInRzMzsxInRzMzsxInRzMzsxInRzMzsxInRzMzsxInRzMzsxInRzMzsxInRzMzsxInRzMzsxInRzMzs5LKbzzeKG87amHVVaiL8455Y9VVMDNrOW45mpmZlTg5mpmZlVTerSppM+DNEfFdSWOBFb09x1HSdOAOYDEM77MezczMiipJjpI+DiyNiLOAp4GTJP0O+CzwYkkrc9HdgIkR8RQwAZgHXJDLdxc2OQmYExHXNiwIMzNrWXVLjpL2Bi4G/ghMiYgtC6tXAMsljQY2BY4E/hoR7y5t40ZgGUBELJT0EPAocHbeBqSW5F7A8nrFYmZm7aWeLccVwA8i4ghJt0v6ALBTXr4zsDL//OGI2EPST3KyrHlL/t4j6SBgH+Aa4MH8XjMzs7qoZ3LsBv5V0k7AFhFxnqStI+IhSR8BngcuBd5fq0tETIfUYoyIFZIAiIhLJT0CzIiIBZJmAZPz+2aTWo5tqaurc0jLW5XjbV3tFCu0V7zNHGu9k2Ot5XirpPWBK3J3a2+mSLou/7xzP9sE2DQipkk6hSaYVFSlRYuWrLGsq6uz1+WtyvG2rnaKFdor3maItb/kXM9LOYrb7oiI54AzgN37KP/7iNgnIvYBfr2W+zEzM1tn9Wx1jWFVt+pLASLiHABJ2/dSfpdiy1FSuW4dpNblXGDLXHYicGY9Km9mZu2rnslxNKu6VeeV1nXk76NqP0fE5uUNSFqPVZNvpgICjgbOiojHc5ltgB2B64c9AjMza0v1TI53AAEQEcfVFkp6F/AfwAdJ1y6O6+3Nki4iTdJZlhctAOZHxOJS0ZnAncDtw1p7MzNrW3VLjhHxLPBsL6uuBP4nIpbm16/tYxMfiYgXRmtrLcVe9uNuVTMzG1YNn+mZJ+YMptywTmO64mv7VT4zyszMRgbP9DQzMytxcjQzMytxcjQzMytxcjQzMytxcjQzMytxcjQzMytxcjQzMytxcjQzMytxcjQzMytxcjQzMytxcjQzMytxcjQzMytp+I3Hq/Khq++qugoNc86+u1ZdBTOzEc0tRzMzsxInRzMzs5IR0a0qaTpwB7AYICJ6qq2RmZm1ssqTo6StSInv3vIq4J8j4mFgAjAPuAA4SVJ3odwkYE5EXNuI+pqZWeurPDkCS/tZ1w0QEQslPQQ8CpwNrMjrFwN7AcvrWkMzM2srzZAcu4GLgXtKy3cAlkk6CNgHuAZ4EFjZ2OqZmVm7qTQ5SnofcDDwFPCSXopcCJxG6k6dERELJM0CJuf1s0ktRyvp6uqsugoN5XhbVzvFCu0VbzPHWmlyjIgLJN0GfBX4GbA90AH8gZT0joyI30vak9zFCmwaEdMknUJztHyb0qJFS6quQsN0dXU63hbVTrFCe8XbDLH2l5ybIbmMAnYDNgK2ICXHyaREOXoQ7zUzMxtWzZAclwDnAX8DXkVKeHcCm5Mv3SAlzCmS5gJbSroOmAic2fDamplZy2uG5PhGYBqp27TWctyalCTvIE3CmUq6tONo4KyIeBxA0jbAjsD1Da+1mZm1rKon5OwKHEKakAOwASk5Pp1fz5H0CLAAmB8Ri0ubmElqZd7egOqamVmbqHpCzl3A9HV4v7tVzcxs2A0qOUoaExErSss6gH1Gyp1pztl318pnRpmZ2cgw2NmeNwBIGivpysLyo4a/SmZmZtXqt+UoaQLp1mzPShpLSqZjCj/7BuBmZtZyBupW/QHwHLAz8D3SZJldCj87OZqZWcsZqFv1AOBk4NcR8Xbg7bWfI+Jtda+dmZlZBQZKjhNIl0uskPQj4E2km4QjaRQwtr7VMzMza7x+k2NE/IV04++/A1cB90bEBXndSuDIutfQzMyswfocc8yTcf6bNK64EbArsKukWpFRwHqkp2qYmZm1jP4m5DwLfBF4HvgY8FfgLuDPeX0HMK6utTMzM6tAn92qudt0IrAd6QbgT5Huebpd/nopqxKlmZlZyxjoUo5vAPNJrcSt8rKX5+87km4M/l/1qZqZmVk1BkqO95EeQtwBRC5fm6W6gjST1czMrKUM5t6qG5GS4yWklmIHsF9ETAbOqmPdzMzMKjGkp3JExNEAknapT3XMzMyqN1ByHJ3L9AB/lHQZeZaqpM0j4ol6V3C43HnNp6uuQsM8WHq9zavmVlIPM7ORaqDkeAzwCNATEVfUFkr6IjC+nhUzMzOrSn83AdgIOBS4ENhO0lSgO6/eAVgoae+IeLr+1TQzM2uc/lqOuwM/i4i7Jf07cEFh3U+BqaTWY92To6TppMlAiwEiwk8DMTOzuukzOUbETwsvr4uIW0pFbl7XnUt6GSkJLy8sHgvcEhGPSjoX+ArpBujzSAn6JEndhfKTgDkRce261sfMzAwGOVs1Ii6r0/6Xk25PV2wJdgP/IunpvK47IhZKegh4FDibdI0lpJbkXqyeXM3MzNbJkC7lGG4R8bCkI4B/AhYBmwKPAXcCm+RiB0maBFxDmoi5soq6jmRdXZ1VV6Hu2iHGonaKt51ihfaKt5ljrTQ5Zj3AEuAf9F6fS0m3rpsREQskzQIm53WzSS1H68eiRUuqrkJddXV1tnyMRe0UbzvFCu0VbzPE2l9yHuhhx41wG2myzcPA3fl1b2rjjJtGxLT8nmZI7mZm1mKaITl+jtRq/HD+/tkhvLcZ6m9mZi2mGZLLRsD+wEvy9xeRZqwWnxXZAUyRNBfYUtJ1uezYBtfVzMzaQDMkxz9GxGHAHfn7HyPiuxFxdqHMVEDA+cABEbFPREwizVLdEVja2CqbmVkra4Yxu90l3QiQv0/JP88GXkaaxboAmB8Ri0vvnUma2Xp7oyprZmatrxmS4zYRsayX5ZdHxCX552d6e2NEnDnYnez25v+sfGZUozTDLDAzs5Gs8m6yGL0jAAAP2ElEQVTVPhIjEbGit+VmZmb1VnlyNDMzazZOjmZmZiVOjmZmZiVOjmZmZiVOjmZmZiVOjmZmZiVOjmZmZiVOjmZmZiVOjmZmZiVOjmZmZiVOjmZmZiVOjmZmZiXN8FSOhjj+qCuqrsKI9dFjplVdBTOzhnLL0czMrMTJ0czMrKRuyVHS6NLrsfn7OEnb91J+Svk9hXXTJb1IUoekjvrU2MzMLKnLmKOkCcDVkiYDAWwPfB84In+9FDiy9LYPALcBl/eyyQnAPOAC4CRJ3YV1k4A5EXHtsAZhZmZtqy7JMSKelrQvcC4pGZ4SEUdIejnwLmAagKSbgRX5bR3AqyUdkV+/FJgSEd0RsVDSQ8CjwNmF9ywG9gKW1yMOMzNrTw0bc5S0KalV+FFgZV78J2A6cEn+PgO4GXgn8NP8voMknQNsB/QU3mtmZlYX9epWHd3Ltp8D/h14CrhV0uuBi4GrgfOAw4G/A1cB3wPOjIhu4FJJjwAzImKBpFnA5LzN2aSWo9VRV1dn1VUY0Eio43Bqp3jbKVZor3ibOdZ6Xef4OuDrwNbAQlKr71uk5Hg18MmIeF7SfcC/AvsCbwA2BI4ntSI3K22zNs64aURMk3RKHetvBYsWLam6Cv3q6ups+joOp3aKt51ihfaKtxli7S8516VbNSJuBg4BLgT2A35MSno/yd9/I2kUsBQ4hdQSfDfwHuD9wHzS+OPGA+zKl6KYmdmwq2fL65WkMcWaJ4APAo8D1wJXkFqYzwGvAd6cy40H/kFqUS4nJdYOYIqkucCWkq4DJgJn1rH+ZmbWpuqZHN8NHFV7ERFPSNoOuBI4MiJ+BiDpOODLEXFHfj0R+FREHFHY1lRAwNHAWRHxeC67DbAjcH0d4zAzszZTrwk5uwNLI+IBSV3A3yXNBj4CvCMiHiwUHw2cLanW+Twe+E1pkwuA+RGxuLR8JnAncPuwB2FmZm2ro6enpy4bljQqIlYWXo8GeorLGqyn6sHfRmmGge5Gcrytq51ihfaKtxli7erq7POOa3XrVi0nwXxZhpmZWdPzbE8zM7MSJ0czM7MSJ0czM7MSJ0czM7MSJ0czM7MSJ0czM7MSJ0czM7MSJ0czM7MSJ0czM7MSJ0czM7MSJ0czM7MSJ0czM7OSej7Psancst87qq5Cw/yh6goUTD73/KqrYGY2ZG45mpmZlTg5mpmZlTRFt6qkjojo86nLkqYDdwCLAfora2Zmtq6aIjkCX5Z0M3ATcBFwCPC6iLgmr58AzAMuAE6SVHxw8iRgTkRc28gKm5lZ62qWbtU3AjdGxDPARGA5cLKkrQEiYiFwIfAocDZwVv46OS9fXkGdzcysRVWeHCW9CXg0Ip7Ni1ZExPPAvsBESQdJOgfYDugBVlZUVTMzaxOVdqtKGg18Gbhf0muAfwa2lnQ58Czw64j4mqRHgBkRsUDSLGBy3sRsYK8q6m6D09XV2VL7aRbtFG87xQrtFW8zx1r1mOPhwI8AAS8FArgd+ExE/KlUtjbOuGlETJN0CtXX3wawaNGSuu+jq6uzIftpFu0UbzvFCu0VbzPE2l9yrrpb9RzgdICI+H6eVHMlMGOQ76+6/mZm1oIqTS4R0dtEmu8Bh0kaJakjL+sApkiaC2wp6Tpgf2Bsg6pqZmZtpBm6JTvyFwARsUjSFcCpwGOSAtiC1PV6NHBWRDwOIGkbYEfg+obX2szMWlYzdEuOy18viIgvAX8DDgNuARYAe0fEg7XEmM0E7iSNU5qZmQ2LyluOEfEwqYu0vPxY4NgB3ntmfWplZmbtrPLk2Ch7LPxe5TOjGqUZZoGZmY1kzdCtamZm1lScHM3MzEqcHM3MzEqcHM3MzEqcHM3MzEqcHM3MzEqcHM3MzEqcHM3MzEqcHM3MzEqcHM3MzEqcHM3MzEqcHM3MzEra5sbjB1760aqrYGZt6Iw3nlx1FWwtuOVoZmZW4uRoZmZW0pTdqpLWi4hlhdfTgTuAxQAR0VNV3czMrPU1ZXIETpP0nYj4RX49AZgHXACcJKm7UHYSMCcirm10Jc3MrDVVmhwlTQCuBZ7JdbkmIk4EVgAvPMo+IhZKegh4FDg7r4fUktwLWN7IepuZWWurNDlGxNPA62qvJb1c0neBXYCXShpHai3uA1wDPAisrKKuZmbWPqpuOR4DTAdGA93A4oh4h6RzgeOBmcAyUoKcERELJM0CJudNzCa1HM3MmlJXV2ddy49kzRxr1S3HkyRdDJwKHEhKkACbA4uATuC+vKy2btOImCbpFJp3zNTMDIBFi5YMXCjr6uocUvmRrBli7S85N8OlHIcC3wT+DTgkL9swIp4jJcdn+nlvM9TfzMxaTKXJRdJ2wMHANoCAuZKmAr/ORTqBp4EOYIqkucCWkq4D9gfGNr7WZmbW6qrulhwNXA78FbgB+BlwOvCBvH5L0ozUN5CS59HAWRHxOICkbYAdgesbW20zM2tlVY85/gGYW3st6UDgPOBBSTcAf46IByQtAOZHxOLSJmYCdwK3N6rOZmbW+qpuOZadHBG1SzX+pbaw1lIsi4gzG1IrMzNrKx09PW1zJ7aeqmdGNUozzAJrJMfbutopVmiveJsh1q6uzo6+1nm2p5mZWYmTo5mZWYmTo5mZWYmTo5mZWYmTo5mZWYmTo5mZWYmTo5mZWYmTo5mZWYmTo5mZWYmTo5mZWYmTo5mZWYmTo5mZWUmzPZWjbt521MKqq2BmZsPgvGPeWPd9uOVoZmZW4uRoZmZW0jTdqpI2A54sPOy4uG46cAewGCAi2uYhlGZm1nhNkxyBC4G5wO29rJsAzAMuAE6S1F1YNwmYExHX1r+KZmbWDpoiOUp6D7A18BVJAGOBHYCdIuLxiFgo6SHgUeBsYEV+62JgL2B542ttZmatqvIxR0lvAz5ASnoHRcQ+wN3A+yPicUkHSToH2A7oAdbodjUzMxtOlbYcJXUArwAOAF4FXCXpbuCeiLgKICIulfQIMCMiFkiaBUzOm5hNajmamVmb6OrqrPs+Kk2OeWLNSQCSngSeB34aEZf1Urw2zrhpREyTdApN0i1sZmaNs2jRkmHZTn9JtuqW4ybAm4H3kVqDDwAflvRhYBPgpog4sp9NVN4tbGZmrafqltcmwHrAWcCuEXFsbYWkacBb8ssOYIqkucCWkq4DJgJnNrKyZmbWHqruVr0fuF/SnsCh+XvNJkDt8oypgICjgbMi4nEASdsAOwLXN67WZmbW6qpuOdaMA77TS8txv/xyATA/IhaX3jcTuJPer400MzNbK82SHG8AbiouiIgbgRvzz4/39qaIcLeqmZkNu46enra5E1vPcM1wanZdXZ3DNptrJHC8raudYoX2ircZYu3q6uzoa51ne5qZmZU4OZqZmZU4OZqZmZU4OZqZmZU4OZqZmZU4OZqZmZW006UcZmZmg+KWo5mZWYmTo5mZWYmTo5mZWYmTo5mZWYmTo5mZWYmTo5mZWYmTo5mZWUmzPM+xLiSNAr4J7AwsBT4YEfdVW6t1J2kscB4wkfSg6BOAe4DzgR7gf4GPRcRKSfNID4VeAXwiIm6ros7DQdIWpIdbv4kUz/m0aLySPgu8HViP9Bm+iRaMN3+WLyB9lruBD9Gif1tJrwG+GhHTJE1ikDH2VbaKGAarFOsuwOmkv+9S4NCIeEzSh4DDSbGeEBFXStocuBhYH3gUeH9EPFtFDK3ectwfGB8RrwOOAb5WcX2Gy3uBv0XEG4C3At8Avg58IS/rAPaTtCuwN/AaYDZwRkX1XWf5IHo28Fxe1LLxSpoGvB7YgxTP1rRuvPsCYyLi9cDxwJdpwVglHQ2cC4zPi4YS4xplG1n3oeol1lOBORExDfg+8BlJWwL/QfqMzwC+ImkcMBe4OMf6K1LyrESrJ8c9gR8DRMStwO7VVmfYLAC+WHi9AtiN1LoA+BGwDyn+ayKiJyIeBMZI6mpoTYfPKcBZpLNJaO14ZwC/BX4AXAFcSevG+wdSvUcBGwHLac1Y/wQcUHg9lBh7K9vMyrHOjoi7889jgOeBVwO3RMTSiHgKuA+YSuGYTcWxtnpy3Ah4qvC6W9KI70qOiKcjYomkTuBy4AtAR0TU7gW4BHgRa8ZfWz6iSDoMWBQRPyksbtl4gc1JJ3LvAj4CXASMatF4nyZ1qd4LnAOcRgv+bSPie6TEXzOUGHsr27TKsUbEXwAkvR44Avgv+o61uLzSWFs9OS4GOguvR0XEiqoqM5wkbQ3cAMyPiIuB4hhEJ/AP1oy/tnyk+QDwJkk3ArsA3wG2KKxvtXj/BvwkIpZFRJDOtIsHiVaK95OkWCeT5gZcQBpnrWmlWIuG8v/aW9kRRdJBpJ6fmRGxiL5jLS6vNNZWT463kMY0kPRaUlfViCfpxcA1wGci4ry8+Fd5rArSOOTPSfHPkDRK0jakk4MnGl7hdRQRe0XE3nnM4m7gUOBHrRovcDPwFkkdkl4CbAhc36LxPsmqlsLfgbG08Ge5YCgx9lZ2xJD0XlKLcVpE3J8X3wa8QdJ4SS8CdiBNNnrhmE3FsY74LsYB/IDU4vgFaSD7/RXXZ7h8DtgE+KKk2tjjx4HTJK0H/B64PCK6Jf0c+CXpROhjldS2Po4CzmnFePOsvb1IB5BaHH+mNeP9L+C8HMd6pM/2HbRmrEVD+fyuUbaKCq8NSaNJXeUPAt+XBHBTRMyTdBop+Y0CPh8Rz0s6Abggz2R9Aji4oqr7kVVmZmZlrd6tamZmNmROjmZmZiVOjmZmZiVOjmZmZiVOjmYtLM8WrP08Kt+JxswG4NmqZi1E0huAbUn3tbyCdFH9i0k3rf4r8H9It+i6gXRrriWke3heDuwbEd0VVNus6fgs0qy1fJF0R505wLER8Rbg28DHI2Jf4Beku5C8jnSXoe1JyfSZfJ2dW5dmODmatZraPS3/KyI+KunjpDuOHCzpfGCz/PVZYArpQusjgEmSfgY8TOvcoN9srblb1axFSHoPKdEtJ90RaiXpaQdfAp4l3X3mctKdVwIQsIz0NIS5uezhEfGJhlferMm0+u3jzNpGRFwkaQKrWo/3ke5D+3LgMNK9SwGOAw4hPSdwOXAi6bFIy4D7MTMnR7NWIWlT0oNwzwN2BGaRbuw9nvQczPHAaOBM0ljjzqSnnLyM9BBaSM/QM2t7HnM0ax2jSY+AgvQcyM+TWo87AfcAfyHdAPoy4HrgQuCiiFgO3AXsD9ze4DqbNSUnR7PW8XdgEukJLQdHRO0JDjNJE282Bh4iPVfvetJDsneS9DLgFcBSYNcK6m3WdJwczVrHq4EZwIERcWN+8vo3gUMj4mrgAeAzwHdJM1VfSxp//DZwEvBO4AxJ21VQd7Om4tmqZm1G0piIWJF/7iA9VLe79joifFCwtufkaGZmVuJuVTMzsxInRzMzsxInRzMzsxInRzMzs5L/D1lA/K2z6ypDAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x24de2549048>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.subplots(figsize=(7,5))\n",
    "df_WordSpeechDistribution.iloc[:10]['频数'].plot(kind='barh')\n",
    "plt.yticks(fontproperties=font,size=10)\n",
    "plt.xlabel('频数',fontproperties=font,size=10)\n",
    "plt.ylabel('词性',fontproperties=font,size=10)\n",
    "plt.title('2017央视春晚主持人【主持词】词性分布分析',fontproperties=font)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 41.8.高频词分析"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>动词</th>\n",
       "      <th>动词计数</th>\n",
       "      <th>名词</th>\n",
       "      <th>名词计数</th>\n",
       "      <th>代词</th>\n",
       "      <th>代词计数</th>\n",
       "      <th>助词</th>\n",
       "      <th>助词计数</th>\n",
       "      <th>副词</th>\n",
       "      <th>副词计数</th>\n",
       "      <th>形容词</th>\n",
       "      <th>形容词计数</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>要</td>\n",
       "      <td>23</td>\n",
       "      <td>中国</td>\n",
       "      <td>27</td>\n",
       "      <td>我们</td>\n",
       "      <td>100.0</td>\n",
       "      <td>着</td>\n",
       "      <td>16.0</td>\n",
       "      <td>不</td>\n",
       "      <td>22.0</td>\n",
       "      <td>好</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>到</td>\n",
       "      <td>22</td>\n",
       "      <td>福</td>\n",
       "      <td>27</td>\n",
       "      <td>我</td>\n",
       "      <td>34.0</td>\n",
       "      <td>过</td>\n",
       "      <td>12.0</td>\n",
       "      <td>最</td>\n",
       "      <td>13.0</td>\n",
       "      <td>新</td>\n",
       "      <td>19.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>请</td>\n",
       "      <td>20</td>\n",
       "      <td>朋友</td>\n",
       "      <td>22</td>\n",
       "      <td>这</td>\n",
       "      <td>32.0</td>\n",
       "      <td>之</td>\n",
       "      <td>11.0</td>\n",
       "      <td>更</td>\n",
       "      <td>12.0</td>\n",
       "      <td>大</td>\n",
       "      <td>15.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>来</td>\n",
       "      <td>18</td>\n",
       "      <td>观众</td>\n",
       "      <td>18</td>\n",
       "      <td>大家</td>\n",
       "      <td>23.0</td>\n",
       "      <td>得</td>\n",
       "      <td>10.0</td>\n",
       "      <td>就</td>\n",
       "      <td>11.0</td>\n",
       "      <td>老</td>\n",
       "      <td>11.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>看</td>\n",
       "      <td>14</td>\n",
       "      <td>航天员</td>\n",
       "      <td>17</td>\n",
       "      <td>你</td>\n",
       "      <td>16.0</td>\n",
       "      <td>地</td>\n",
       "      <td>5.0</td>\n",
       "      <td>还</td>\n",
       "      <td>9.0</td>\n",
       "      <td>幸运</td>\n",
       "      <td>11.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>说</td>\n",
       "      <td>14</td>\n",
       "      <td>观众朋友们</td>\n",
       "      <td>17</td>\n",
       "      <td>您</td>\n",
       "      <td>9.0</td>\n",
       "      <td>等</td>\n",
       "      <td>1.0</td>\n",
       "      <td>正</td>\n",
       "      <td>9.0</td>\n",
       "      <td>伟大</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>感谢</td>\n",
       "      <td>12</td>\n",
       "      <td>舟</td>\n",
       "      <td>14</td>\n",
       "      <td>此刻</td>\n",
       "      <td>8.0</td>\n",
       "      <td>连</td>\n",
       "      <td>1.0</td>\n",
       "      <td>将</td>\n",
       "      <td>8.0</td>\n",
       "      <td>欢乐</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>带</td>\n",
       "      <td>11</td>\n",
       "      <td>全国</td>\n",
       "      <td>14</td>\n",
       "      <td>每</td>\n",
       "      <td>7.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>再</td>\n",
       "      <td>8.0</td>\n",
       "      <td>美好</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>会</td>\n",
       "      <td>10</td>\n",
       "      <td>字</td>\n",
       "      <td>14</td>\n",
       "      <td>这里</td>\n",
       "      <td>7.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>正在</td>\n",
       "      <td>7.0</td>\n",
       "      <td>深</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>过年</td>\n",
       "      <td>9</td>\n",
       "      <td>神</td>\n",
       "      <td>14</td>\n",
       "      <td>各族</td>\n",
       "      <td>7.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>又</td>\n",
       "      <td>7.0</td>\n",
       "      <td>小</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>花</td>\n",
       "      <td>9</td>\n",
       "      <td>祖国</td>\n",
       "      <td>13</td>\n",
       "      <td>你们</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>越</td>\n",
       "      <td>6.0</td>\n",
       "      <td>高</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>祝</td>\n",
       "      <td>8</td>\n",
       "      <td>航天</td>\n",
       "      <td>13</td>\n",
       "      <td>他们</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>一起</td>\n",
       "      <td>6.0</td>\n",
       "      <td>快乐</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>想</td>\n",
       "      <td>8</td>\n",
       "      <td>人民</td>\n",
       "      <td>13</td>\n",
       "      <td>那</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>共同</td>\n",
       "      <td>5.0</td>\n",
       "      <td>多</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>送</td>\n",
       "      <td>8</td>\n",
       "      <td>此时此刻</td>\n",
       "      <td>12</td>\n",
       "      <td>自己</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>就要</td>\n",
       "      <td>4.0</td>\n",
       "      <td>广</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>祝福</td>\n",
       "      <td>8</td>\n",
       "      <td>舞台</td>\n",
       "      <td>11</td>\n",
       "      <td>谁</td>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>都</td>\n",
       "      <td>4.0</td>\n",
       "      <td>长寿</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>唱</td>\n",
       "      <td>8</td>\n",
       "      <td>代表</td>\n",
       "      <td>10</td>\n",
       "      <td>什么</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>惟</td>\n",
       "      <td>4.0</td>\n",
       "      <td>著名</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    动词  动词计数     名词  名词计数  代词   代词计数   助词  助词计数  副词  副词计数 形容词  形容词计数\n",
       "0    要    23     中国    27  我们  100.0    着  16.0   不  22.0   好   23.0\n",
       "1    到    22      福    27   我   34.0    过  12.0   最  13.0   新   19.0\n",
       "2    请    20     朋友    22   这   32.0    之  11.0   更  12.0   大   15.0\n",
       "3    来    18     观众    18  大家   23.0    得  10.0   就  11.0   老   11.0\n",
       "4    看    14    航天员    17   你   16.0    地   5.0   还   9.0  幸运   11.0\n",
       "5    说    14  观众朋友们    17   您    9.0    等   1.0   正   9.0  伟大   10.0\n",
       "6   感谢    12      舟    14  此刻    8.0    连   1.0   将   8.0  欢乐    9.0\n",
       "7    带    11     全国    14   每    7.0  NaN   NaN   再   8.0  美好    6.0\n",
       "8    会    10      字    14  这里    7.0  NaN   NaN  正在   7.0   深    5.0\n",
       "9   过年     9      神    14  各族    7.0  NaN   NaN   又   7.0   小    4.0\n",
       "10   花     9     祖国    13  你们    6.0  NaN   NaN   越   6.0   高    4.0\n",
       "11   祝     8     航天    13  他们    6.0  NaN   NaN  一起   6.0  快乐    4.0\n",
       "12   想     8     人民    13   那    5.0  NaN   NaN  共同   5.0   多    4.0\n",
       "13   送     8   此时此刻    12  自己    5.0  NaN   NaN  就要   4.0   广    4.0\n",
       "14  祝福     8     舞台    11   谁    4.0  NaN   NaN   都   4.0  长寿    3.0\n",
       "15   唱     8     代表    10  什么    3.0  NaN   NaN   惟   4.0  著名    3.0"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "columns_slected=['动词','动词计数','名词','名词计数','代词','代词计数','助词','助词计数','副词','副词计数','形容词','形容词计数']\n",
    "\n",
    "df_Top6 = pd.DataFrame(columns=columns_slected)\n",
    "\n",
    "for i in range(0,12,2):\n",
    "    df_Top6[columns_slected[i]] = df_words.loc[df_words['词性']==columns_slected[i]]['词汇'].value_counts().reset_index()['index']\n",
    "    df_Top6[columns_slected[i+1]] = df_words.loc[df_words['词性']==columns_slected[i]]['词汇'].value_counts().reset_index()['词汇']\n",
    "\n",
    "df_Top6.head(16)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 41.8 词频统计"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>词汇</th>\n",
       "      <th>词性</th>\n",
       "      <th>年份</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>中国</td>\n",
       "      <td>名词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>中央电视台</td>\n",
       "      <td>None</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>中国</td>\n",
       "      <td>名词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>中央电视台</td>\n",
       "      <td>None</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>此刻</td>\n",
       "      <td>代词</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      词汇    词性    年份\n",
       "1     中国    名词  2017\n",
       "2  中央电视台  None  2017\n",
       "5     中国    名词  2017\n",
       "6  中央电视台  None  2017\n",
       "9     此刻    代词  2017"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_words.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>年份</th>\n",
       "      <th>2017</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>中国</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>中央电视台</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>中国</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>中央电视台</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>此刻</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "年份   2017\n",
       "1      中国\n",
       "2   中央电视台\n",
       "5      中国\n",
       "6   中央电视台\n",
       "9      此刻"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_AnnaulWords=df_words[[\"年份\",\"词汇\"]].pivot(columns=\"年份\", values=\"词汇\")\n",
    "df_AnnaulWords.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>年份</th>\n",
       "      <th>2017</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>中国</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>中央电视台</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>中国</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>中央电视台</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>此刻</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "年份   2017\n",
       "1      中国\n",
       "2   中央电视台\n",
       "5      中国\n",
       "6   中央电视台\n",
       "9      此刻"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_AnnaulWords.fillna(0,inplace=True)\n",
    "  \n",
    "df_AnnaulWords.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>2017</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>我</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>春</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>这</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>们</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>年</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>福</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>中国</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>来</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>大家</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>好</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>要</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>到</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>位</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>朋友</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>晚</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>不</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>个</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>请</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>上</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>号</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   2017\n",
       "1     我\n",
       "2     春\n",
       "3     这\n",
       "4     们\n",
       "5     年\n",
       "6     福\n",
       "7    中国\n",
       "8     来\n",
       "9    大家\n",
       "10    好\n",
       "11    要\n",
       "12    到\n",
       "13    位\n",
       "14   朋友\n",
       "15    晚\n",
       "16    不\n",
       "17    个\n",
       "18    请\n",
       "19    上\n",
       "20    号"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_AnnualTopWords=pd.DataFrame(columns=[2017])\n",
    "df_AnnualTopWords[2017]=df_AnnaulWords[2017].value_counts().reset_index()[\"index\"]\n",
    "df_AnnualTopWords[1:].head(20)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 41.10 关键词分析"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>2017</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>杨利伟</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>姜昆</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>@春晚</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>中国</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>世界</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>人心</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>观众朋友们</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>央视</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>传祺</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>央视网</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    2017\n",
       "0    杨利伟\n",
       "1     姜昆\n",
       "2    @春晚\n",
       "3     中国\n",
       "4     世界\n",
       "5     人心\n",
       "6  观众朋友们\n",
       "7     央视\n",
       "8     传祺\n",
       "9    央视网"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_annual_keywords = pd.DataFrame(columns=[2017])\n",
    "df_annual_keywords[2017]=pynlpir.get_key_words(' '.join(df_AnnualTopWords[2017].astype('str')))\n",
    "df_annual_keywords.head(10)    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 41.11. 生成词云"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from wordcloud import WordCloud,ImageColorGenerator\n",
    "from imageio import imread\n",
    "\n",
    "font_wc= r'C:\\Windows\\Fonts\\msyhbd.ttc'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'中国 中央电视台 中国 中央电视台 此刻'"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "myText=' '.join(df_words.词汇)\n",
    "myText[:20]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "bg_pic = imread('CCTVSpringFestvialGala/host2.jpg')\n",
    "wc = WordCloud(font_path=font_wc, mask=bg_pic,max_words=500,max_font_size=200,\n",
    "               background_color='white',colormap= 'Reds_r',scale=15.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAMsAAAD7CAYAAADEgWCeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJztnXmcFMXd/z8zs1xyLOyysCwsKzeygAoaQNQHFBFEgkciRtFgjBB8OB6PJ1GRqCFq9Ec0iDGCMRo1KiGP0SARCYrhEAniwb0iy7GyLMLusty429O/P3qrp7q6uqdmpmemZ/m+X69+dXVVd0/xot/7rarurg7oug6CIKITTHcFCCJTIFkIQhGShSAUIVkIQhGShSAUIVkIQhGShSAUIVkIQhGShSAUyUp3BWKAHjUgvCAQ74EUWQhCEZKFIBQhWQhCEZKFIBQhWQhCEZKFIBQhWQhCEZKFIBQhWQhCEZKFIBQhWQhCEZKFIBQhWQhCEZKFIBQhWQhCEZKFIBQhWQhCEZKFIBQhWQhCEZKFIBQhWQhCEZKFIBQhWQhCEZKFIBQhWQhCEZKFIBQhWQhCEZLFp3y35O84fEk/HL6kX7qrQtQTyKBPe2dMRRNB++hNhIbdmO5qNGTinhg8k2bRb5DUTRtr2c6atxjhNUsRHDoKAKCteQsAEBp6XcrrRlihyJJBHLliIACg1b82pLkmGQ1Flkzk0w6dAQAX7N9rptm2uI+YT6Qe6uCnkc6PzXYV4NMOnXHB/r3mPkycUxOG49SE4SmpIxGBZEkjex+YZYkcPJ926Iz2d9xuyWPSNH1tBZq+tiLp9SOskCxpxC2qXLB/Lw688KKjTHUzxiWrWtDm3mUu/PaZDsmSRmQiRGuWMbLmvuN5fT7OL8TH+YUIzXja83M3BGg0LI2wPolT9ODLxHSq0ebehdCMp3H4kn5ovWpTyn/fQ+IeDSNZfIResROB/G7QK3aaeYH8btjRuzt6bP8aALDr0ovQZeXHnv/20VEXouXS9Z6f14fQ0HGmUXNpfwBA9sqNyscwaXh5vIKJsqhNezPvh9UHPP2NTIf6LGkie+VGUxS9YqclmvDs6N0dgBFRkiUKoC4Jv9+ZBkUWnxDI72ZL6xU7bWIkQxQRXghRDl6kmY3b4NHvqpNeH79AsqQZfdcXCHQ5z5YGrAKlC16ORW3am/KciU00aoalmUBBT6D2tJHmRHmiWY65ZksqYBKIMixq096WV3zt91NSJ79AkSXdNDkr3TVwbHaJ/RgmDMvvOPD81FXSB9DQsQ/Q3n4WoWum2tapombPXmQXye/1NEDiHjqmZpjPsIhyrMpYxLTHMFH+0KIt/tCirXQfls/2cdqvIUPNMJ+gvf2sLW2JLi1S02eZcuwQANhkmHLs0BkpCA9FFp/AixG6Zqq5rT17b9J/mx88YEJMLN2GKccOmfIw+O3fn2HykCw+gY8mLJ0KUQDgFycjzTsmw8tdz4l63H8LIjV0SBYfwEcSGeF35if192WRRYwoYjlw5kUW6rP4ADGqWJpkU+ck/fdlkUUmjVh2pkUWGjr2AU5DxawZlgphziDoqeOGgDgKlmpJSgf0R/jkSXO7+7Yd0N77o7kdGv1TaO/90bI+k6A+SwqZ2zwXAKC9+EvLmhHoMQAAEN5ifV9FmzMV2pypljTb9pKun20EAgF037bDks+k4MU5EyFZUsiM45UAgNDtv7KsASOqBIsvQuiaqdB3fAYAqHtwgvUENZUI3fsskknWFVdjQ0GRRRgmCS9NoHP00bKGBvVZfIDYZ7FtO0SR0L3P4k+t8gAAPzlyMLmVbDjQa8V+QXt6hpkO3TXXzGNpt/1s53puFkJ3zk5CLWOn9s/PoO6ff0OzhStxcvylZn6zhSvTWKu4oGfD0sl7bQusGa1yjMUFXhDt6RkWecx9OFGOjR1sKav9x8I4aho/jX483RSj2cKV5nImQaNhHjD6ULllO3T7Q2ZajCravP9FaNr/i3rOY2MHo8XiTxzLAOD0C9Ypi9j+2pq3EppI/POORTh/3x5bmm2L8OUNGWqGxQmbGihV8PI4pb3myIqPAAA7J/wY3V77M1oNH9YQZKFmWCrhZ2rk+fqcHnGf89DiJWb62NjBZvQQm19O+7B05ZC+qBzSF0Bk1v1Y+bxjET7vWIRWw4dh54QfAwBaDR8GwBCDycHWVW+/gw0FdokaHLquZ8qScSxt20Ff2raDNF+W5jl69SDHbae0ruv6ocHF+qHBxbqu63rNiAGxV7qezwo6m2uWlpVnIHFfg9RniYO6O8cg67kl5prxfl6BdP/38wpw5cFySznLU4GPLrKIw9ItFn+C3LWbzfx4v+PCmlpif8UN7eGJZjr08MtmWj9QikD7rnHVw29Qn8VDeBl4OXgpnITi92H9EFkTTOyfJKPPwiSR9U9knL9vjylL6OGXoT080SKMz6Bnw1JJeLMxZBrseynCm1ci2PfSKEe4RxKWr5d8gkCvwWg+53cA7GKIlA807qK34PJYfyV37WbUPTgBwZHjESjqiUBRr6h1ZLBo0ntYVzT7i/Fpi7rp30fWM/9A3XTrjC5Zz/wDQEQSk+qDQJs8y2z/yZjMPJVQZIkTvcKY7C6Qb8wYKTa1WJpfy/Zj+wIRWcz1/sgjJ4EOxuCBvn+HmU432mOTAAChBxZY0tHRkcAf+EShO/ipxpSl3dlAMEsqhJhmyIRy/J16OXhxANiE0R69w3ZsaOYL0Fa8bqSH3wTtd/cAx4+YZdGOF88l7iuew+l8oZkv2LbTCDXDUsXpWy9Hk1c+MCOKiCiKWAbAFll43mtbYLnJyUvBpGF54gUuvaCH32Tdrr9wtUfvsF20Thc1yxN/z1FQ4bdkLL5oGKq3bMWtNd9Ky/0IyRIDp2+93Fw3eeUDS5lTJ57PFyWSCSM+DcBggogRRbwgZRKIkcBNGDd4eaIdx36D/02+LmM//kj5d/0CyRIDryzfhTvKSy15XxSejfPKdpvbKsPBVx4sx//l5CO7uA9qtmyNuv/bOfm4pqrC2KiqP3+OIVq05pOs3EkulcjhVuYWSWzHP3IbQg+9pLSvXyBZYkAmCr/mYQKxsvPKdlvEur6qAv+Xky/9ndLinui65Stz2xQFMCUBrH+9nS5SJxH4MvF8bsc69VdUJclk6HEXVSr3RRZFvig8G2f172+JPLxY11dV4HpehHqYKAfumm4r00uif53L6cKNNQqxppoI34xj+7k1y/Sv/hOtyhkByRIruR3NJC+BU/rExo34ovBsiyQsXXffjdKfKC3uidLinji+bCkA4OXsPHMd6HWhuR8fVdxGnFTlkUUap/M4RSyZNIGe37PlhR56Cdojt0nr5VeoGRYrlfsswgAROVhTKxps/6zfvCkt55tgADCx5qBlLSIbgXK6iKPtFw2nJp9Kp9+MRBkmCYNkSQCnPgvfN+GjDCvTZt9uyQvNehHa7NsRmvViJPNEjbE+Kxt1D05A1q9fM4vYtiwSuG3LRsWcyqXDxdm5judXbeKl+R5LQtBNSVVYX0WIKk6RxC3K9Jt0uWVbKks9H+R1xOUH95lrBnv8BADq7v0Bsub8zXLcu7kdzPTVlftt59X3lSDQrgj6t3sQ6Gg8CrO5SxcAQN9du7C5Sxf03bXLzGdpAJjfsi0mH5VPsCeWzW9pTMg3+eghW9n0YDaeCdeY6xRB77OkDK6DzyIIvwD2/otYDkAqhgyZKEDkmSyG9rjz1EgvZ+eZ/R5GoGMvoFFTUxQAFjnY+tjq1ZY8dvGLvNSxq1k2v2VbvPeDH1n25csYTJAUipIQJIsquR0jSz3s4l/QvjP2rVpjyQOA/ktfRXjLKoS3rAIAhLeswnlluxG4biIAQxgmzclPNkPbHnm8nkcURYWrK/eby8Sag9L+jrboKWiLnrLlM2n67tqFFhdfLD2/KMJt+6zD6qMXvR61jtpLj1jWfoeaYTHyTm4+xlXah3u9RlsSeSAxNGaS676yZhh/DvF4p3xVZM0wlsc3u/gyHqcmXIqgZ8OSSnV9m79Jc2mxTCBZnr5rE8Jb17r+lNsFHO0iV5GGl1BbssA8F5/vRPidt9BowVLpxb7tRC0AuQhinvaUcf8odPcz5jZL+xmSRYU2HSybTAR9/XKEVy8285xQuRD5fWUyiBc5EJGGCSIThRFvFOFptGCpmT7y/vsomzIFxaVG8+t3mtHv2NLV/a3I4tLSjBBDBvVZEiBw4QhzmqNxlRVmJOHTgDcXajSBVMucmmSxUjZlCgC5HMWlpbbFVr/66MLWmQBFlhhwix58GUur9m1CYyYh/PlyBM8fgfDmVa77iRe3UyRy/b1RP4G29E8AgED3ATEdy+RgAmzp2hVbuna1CREtwrDokklRhmRRRBRA7JOMq6yQyqR98KqRaNoMoctvMfJc/poH+17iWg9eGFuUeONJhH708yj/EgChLNvxKpGLF0CUQSaMG3pZ5GnrQGEffH3bT9H9JX/P0k+yxIBMDpk8vDRMkGgEzx+hXA+nSMJEcZIxliZX3YxxCIwcY8mTRQ8nQYpLSy3lvFxMlEBhH3Pb76IANHScEuLtF8QCL5D4e04jXqExk+zzNKP+BbQj1dBWLbLt79S8kknhtJ9evgPQahEo7GOKs+F7o3DB/r2ux3oEDR0nFXE4tGVbI69lW6C6AmgjNL+qK6Ct/jsAIDR2SooqaSATM9Z+DZuRRYwsgDW6OEUWWR4vUaCgB/SyrZamWIpESQiSRZWzsiNprdZaVm3vyKdaklQh66swVKMLgzXDAODTDp19LwzJokqokbFmEYVHElksh0YZruXL781qjTl1h133cSLe5t7oQ+WWiTL4ub5EiktLMbd5rvkVMz4NAM3mPYeuY0YBABZdMQY//NcS6Xl49LKtvhcFIFnUkdyZ1p77BUJ3PgHoYSDA3bIS5TlaCZw+YaTbFrr+zJy6w0rViWfIWETsr7BtJo1MPvZdTLZm6RnHK808lm7U/CylevARxs/QTUkXtDeejGzw0aQ+uoTufMLY5kSpnXKV/UQtcw1JooiiXK/6i1hbsgDhz5bHfZ7Rh8ptUWX0oXLUzRjnGF34KMLSYh6Tp/b4CaV68H0XP0OyuGDes+BFadk20sHn2P2a8ZTtV52iT+Uqoi1ZYFmi7csTHGAfco4l4hzdth2A5OtlDogRJVqe+YXmFa+bE/7xcmSKKAA1w9SR9VU4zp5gTGZXPPM+W1k8fQmnu/Uqx6nkucHmJI613nyEAaz9GTbZn1621TJkzKf9DkUWF6TNsKOHHMVZ36GzZe1pXSQXbmjMJHzUrpNlOxbea1uA1ZdcZssDgPCa96XHzDheaWt28Z39xT+82fU3RTlYen7Lto4vlvkFiiwqOMgR3rIKweLI4ykX1o/oXBjjyE48HXV2zLBvv4m67yNN2uCh09W2/NGHynF023aLMKzPglAWApdfaTuG78TLtscu+gvK167DohFXmeVixJFFljS/46IEyeKCY58FkI6OPdCotZl+rPYwHmjUGo/VGqNbbnfYNxadjf57dpvrqPUS7siryPZIkzZ48K0nou4HuDfDxAt/+pfLEOg+0CzTv96ADnlZmP7lMqXfAgxh3N7r9wskiwossrD/TNbJd8FJFBlMEF4Up2aXjGjDyA+drsYjTdpIy1qe09s2v7K+fwfQRDLsq+tAIPrTIkweANC/dv76GB9d2l0Q3/cvUwnJEgtCVAkWX4KdfYyJurttNT4JwQQBIsKI311xI9o7KrG82chgzTDZMeG1RhQJDokMFTt+/4UTRf96gymFXvoFENYi24qC8HnffrrB99GFOvgqiFGEiyzdtu6QisJvM0HcRFEZNo5KVblj0btT75bmB4eMs4jihn7AeJyFFwUAAl3Ps+3rFF0ChX2kNyH9LAmDIksUDi7/EHkjjA5w+LNlCA4YaZbx2+HP5G304ICRcUsgiyKxNM8YrHMvHls5pC9y1272/Kloi0jdB9oijWz4GPC/MBRZorD+RutQaPizZebCtgFDCiYOS7NtlQ64uE9cj7LkyG8sOvVXYvmt0JhJQKOmACICWCKGQySRNcmYKCzKZMp9FoosUbjqkH02x+CAkRZJAGtk4dN8JIqG40tdkhuU0Y4RiWU0zBGtzkzycjDET2UEug/E4Uv6mdutV22KlHFNsWlFQwAAz/p8sj2SRRGnZhZDlCaaJNEu8pPjI4/NNFu4UtokUxXFqRkWDfH8gTzrzVb94F5bHkMUB4BFnJZjhiJ03/MA/C8Jg96UjAFeBJkUKhHl+PeNv6LN/7HWkmYwSZotXGluszSPXl6CQEEv6OUlAIBAgTEN64ftOuKybyMzWGrLXnb894RGTnQsiwX2ifGCDdtsZYcv6YfWqzbZ1pEK1plzAqQImus4FfDRQ5RB7LtY+jWfvge9wn0yB33bOgBA00fmWOQQRWFyiGmeD9tFpphlQoRGTnSU4+2cfLzNfYXszkAr3BloBQA4OupCHB0V+SbMqSdn4dSTsyzHF2zYZorCjmXHMzH4dUmPbpGDUytKQlBkSQb1N++YVNpHke+wnHpqrmNU8YJYIsuKm2diRP08yvx3K9mF/px+xBSl5VLrF8fYPrHynH7ElCWU3RpazWH02rEzrnPFCb2Dn1JO1BivGZ/g2tr8a8f1N+9kTbHmL/3Nsjap/0gSL1ashIYZXxLjhXFvas00U9dIPtcHGJLwkeX0/KfQZLL8no0qKZbDM0iWeGBi8IKowmbhF77zYtuOEyZJeLsRsYK9h9i2w9vXIth7iBlVosFHFVGU5/QjSufgIxH7RHrWjZMRuuoGpeP9ADXD4oF/RozDi6jAzsG2o8H/puoxjESaUnxTLZbzqMqVRKgZljL4u8xRXghLlHjlq500yrLNT+jtFbFe9Pz+dXdfh6yn3vK6SkmHZImVYBYQrr85x88f5gNqJ41CowVL0WjBUpswMm5tH2lGDq4oA2CNNolEAf5YfnTMB5ElbkiWWGneGid/9F8AgGZv/BsnJ11vphmsOaQf3INAXpGtqbQ8ryMuL9+D8JpIJ1/2LgvfrHJqnvHnZhGEScOE0ebeZRw742npP4mJInJnoFXk4q78BsjtZCvn4ZtkB78uxUM9zrOVZzIkSxw0e+PfVmHq0yKBvCJpvqxjLXvpS9YMU+mjMGncml+f5Bda0k7CmAiiMJgAfPQA4CpKJjbBALopGTd8JPEb2oKZjnkP9Tae6Xr5QA0GV5SZCyDv8PN54e1rzVE1vtyp6ZbpkUSERsM8xPPRMG7Ujc+XpS3nkMgCAKFJjzr+vuyCj9Z/EctlsokjZ0DaO/j0uEs64L81nxQUXl9WgZckvHUNwiXrLOUqkcFJBFWx+H0ytRlGfZYEuLrS/vg+4N4Jl3XanYjWZ3E6nsmhLZiJ0K3WKBPsM9RMyzrojOPfH2J7FCfavRWZOOL+lkGDDINkSSOx3kSM+fz10kwPZuOZKI/BixcwE0VFgGj7sAiUyaIA1GdJGmw2fMAeWfSdnwEAAt0GWNL8vvHcwd922/1muu+uXUrHx3IBR9uXlyUQDOL3mtok5ymG7uD7hSPlFfhV594ArMLIEEXhSWSwQFUUwPjrL75o5rYvj75/BwIdehhTJwH4feV2BLhXm09NGI6mr62wnWdV+0645ED0yQH9BsniMa0K8qN+NuL4tJ+Z6ebznvfst50kCW9cgWD/4TGdi38Jjb2AxtZMEoZs6qRTE4Zb1oymr63ISFEAksUz6u7/EbIefwMAoP19HkLXTnPct8U//2PL0/duQqBzv6T0Y2SiyKKJW4RhZfo32xHoZEROURLxeBZZvurZ3czvye2vPfMAQtMfi/Ffkz6oz+IR2sMTEXr4ZXObf/OQcU1VhWO+DPYovZjHI5ZHQy/5BID7HGaxoD1vfDUg9LPf2MpY/WX/jjRCfZZ0w4sCRMTg13wZQyaPkyQsj12APNpff4vQDffEVGd2odv+LT/7jbRMJoQbfH0bAiSLh2jv/RGh0T8FEJGAXztFECDy8KMMUR7+L3assGlk48VJMKd8IHbJ/ArJkiTEiMJHEFk0cXvoUWzKyESJJaqITTB2MYsXvFP+mQrJ4iEsqojI5IiGU9TgRfGqeRNLtOCjhKy/srp9J1zMjXZ93rEI51NkIURW1E9BNPxb6yP4YvMrWp8FgK3ZxadlTTJVvO7gA1aphjwyocFGInqQ0gNWcPN08aLwUlRd1BdVF/XFNVUVqLqor5nH9pkezMb0oPHm4v/0ibzlOD2Yjf/pM8oUQi/53CzT/jwb+rrl0P48W7muMklCP/uNtF/hlH+mQpHFA5ggYkThyfl4sy3N5/HPbjmlASA07k7buUM/nmXLSzZO/Rux/Px9e1JWp2RD91k8pu7BCcj69WuO2yzPidBt9yHQra/rfux82tMzELprbkz14z+o5NhcatIMOH3SXjeH/orYd3G79+ID6D6LH2AXNxOE3wZgk0YFN9FiFQVwboYBnDz1ojiNhsmiivb8fVL5vvup8RHXxn+Uf/04kyBZkoiTHNEiD49TdKm79wfImvM3aZkbXj4m7xY5WFlDkIRBsniMLKKwfDGPIeaxfd0kikcUwP7kcKxNJacml9N+8fyGXyFZPEZ24bv1UVTPIxJPk84J8cLWnr/P9ZEXlu/WyW+Iw8ckS4qxNcFmzkfdo5OlF78YjXjxTt96OZq88oEndRL/8rNtp4gQrTxaWaZC91lSCB8tzHSz5rYyFRIVRXt3PtCpi7FW4MvOZ0vTFiqNO/ds4u+GBsniMW59E5Yvjo7xx4hRhG0HB42IqznnROjqyeYCANpbc82FbfNrBhPliWY5+PeDj1hPWj8Rn1cRz2/QfRYPkXXo+XxeClmzi893G26ONoIWD7wUoetmWPLYtgx9X/1n+jr28rQ+SSTu+ywkixccrZTnt8x131csP1oJtMzFVz27o+dXX1uK9HLjY6aBgp5IBtpbcxG6boYtkgARWfSDe4065HW2fXyVlYk4faA1jdAke76Cl6DuO2PtJJTkOFEUwJAkWaI4IUYU2deKeUkCeZ3Nffh0Q4Fk8QpekKOVQLP6aYGyGlvz2X5CVNEenyw97dZuXc01W5KNU7OLieH2Se+GDMniBbwAbMlqhNrJoyP71H0X2Y9Fm3rcROmzs9Rc99np/sXjeHHq1ItNMj5quOU1VEgWD9B3fWGsd6y3ldVOHm1I04abF5lLM1FC98830pXyV49ZVPFSmCNXGDPqs0girsU03+SS9VHEPKd+TKZCNyU9INDF+BZJoMeFlvxG899zPU4UxRSmPq/syCn0AZIWUdxgnX3W8Wewzr2YZjQ0QXhoNCxJODWtGKH7rTcDmSximvF+XmSmxysPlntUy/rfXrkQoUvHR7YFQUT0Q2UItDU+hnT0t4+h5T0PeFqfJEOP6KcTfdcXZnRhiBc7Y0vX+g76C11RXCqPGLJjS08a/Zwpxw7Zyjzh5DGgWQtpkXjH/ty9u/Fl57Nx7t7dUUUJf7YMwQEjvaplWqE+iwfwotRNG4u6aWMt5SyvbtpY9BpTjF5jih1F0V75NbRXfg1odca6Pm/KsUPJEwWAtn6JY9m5e3fj3L27zTSjrroaAFCxeg0A4JXsdpa1/tWGBiMKQLJ4hr5+BeqmjUXWvMUAEBGm1ogILD9r3mIzDQBvtG5nP1mjJkAoEvRDtz6YpFpH4JthTvCinLt3N7LatAEALBtzrUWUW2u+BQCEt8Y+r5mfoT6LRzBRLML88idA9UHp/rwwTmh/fQqhG+4GACzLK8BIj/sq5u+sXAggIgzfZ5F12MVhYiYIL4qPocdd0o3Y9AJgiTKiSDxv1U+HdF1VBXDkINAqz1zv7d8LnTeWJLXubh18t0dcgEiTS8TH0pAsfoKJwwSRoRJZACRdFr1sKwKFfSx5Fll2rDeHxPUd64HW7aMK42NRABoN8x/Hv9wDXNofAJC9ciMAOEYWN/Lm/cHzuvGIovAwUfibrU536vlmWIY0x2KGIkuK2HR2FwBAv932Dw6tyy+UHjOo/vv0qUD78C8IXXYzdvU1Htbsstl4ypm9yCW+o7I8ryNGHDTmSVt65dUY9f67ZtmJD5fjrMtGYE+/XijalNwmZBxQZPErdT+/AVlP/lUqCQ8vxuYuXXD8ZB0A4MiLC9Dq9klJq9/doWwz/ZR2s5lm0gARcXiYKDwsojBRGho0dOwhdT+/wZJm23xamzMV2pyprufpu2uXKc/hub9NUm2tPKXVYFffnqYYXTZ/ZS6PN8ux7b88r6NlW2x6FW0qQdGmEkwNZouHZizUDPMQXpasJ/9qRhW2dkPWFBtUUZaS0bB44JthshExH/dZqBnmF5gUvDjRcOqzAEi5KNqyl6X5oZETLdt8M6zd4O9h1PvvQv92L/TSLxEcHBkBfKFlHu44Kr/XlGlQMyxJRIskAFA9tB8Aa39lUEWZuZwcf6nlo6Z+J9Cus00Ufp3pkCwe4iSIoziNGls2mTQs0rh9k95LTvzgYpy4Kf7pi/iRMJ47jh40l4YA9VlSRHjTRwj2GyYtY3IMqijDuvxCy9rvbO/eDb2/3hnJOFYFtMixp/0D3cFPF+s7RG7SXbh/Lx5vloP7T1bFfB5eEkaqZFnYpr0tb3z1ATN/fPUBtROJogBAixxof5+H0LXTvKiqF9DsLumi/8crceF++8OGp265TPkcoiCFD/zC3E52n2Vhm/YYX33AssQFE+VYlUUUAAhdOw2nJgz3qMbpgyJLgmhvzkHoxnvddzp8AGht/+vNkDXDUkXt0aNo1LKlKU3cyJpc1AxLG76vaN2sW5A1+1UA1sjS9NUPXY9zGjru/1/GIzIp6ejXnjbeo5GwfchF6LrwTTTubH8urHpoP7RZsykSTRj+k4RBzbB0UDfrFnMRafrqh1ElEWERha1TNRoGwFEUAOi99mOpKAAMUYCIHC1y7M2xBgLdlEwQFkmYMHx0UUW8z8LW2pN3ovHAvh7VVI3wBuuXuoIDjc/c6VX7EcjpIDskAh9NmDANCGqGJQAfUWIVhMHmApvXvC2mHT9krlNK/acikNvJJguDSSPi2AxjtMjBO7n5GOcwH1oaoMdd0oUYWVjeqVsuQ9NXPzTXTrA5wZggvCitcVNzAAAGs0lEQVTak8ZnvEM/f87zenuF2QwDrJGlgUUVgGRJGCZJ1uxXbX0X1eHjtL/PUv9dlYRx6OT7KKokBHXwEyBr9qvmwucxYu3gy2DRJemwppgDc5tLPp/hRn2f5Z3c/AQq5S9IFg8RowuLLJ/86vGox7KHJ1kaSKEoAnz/JDjwSgQHXokZx+WfzGAPg5rwo2FB4/JqKMJQMywBZEPGPKzPMviX9yudL63PhUmaYrw0c5vnSoWx9Vn4pthZrT2tYrqh0TAP4fsvsSD2WdL5IGV4w/sIDrzScQjZEdndeq0WCDXyuIYJQ6Nh6cAtssQ6XCo+cZxOZKLMamzMPjn7u2r5MXu3Gfv2GYrw1jUI9hnqR1ESgmRJAFkEYQKpiBJe8iL0/bulkWRQRRm0159A6KZfOJ8gSbAowqRhaydRAEMSWbohQc2wJBFrZOEfpmTb6WqGyXhoyI0A3IUBYH36GABa5KBm+HnIXvGFl9VMBHo2LJPhm13r8gvT3gwDItGFrZUkkaUbEBRZPOC7264AADR+6V+onXk7Gj36YkLnM6NKTf0j89nOj/f7Bj6iCJHFZ1BkSRe8KACglxsvgsVyb4FFEz6qrMsv9JUksxq3MTv5jPDGFZa1iRBZaoZbP/SUqVAHP0nEOhImsi6/MBJZ0sSa9oUYesBatz+1ysNPjhgTUAT7D7esTRrgE8cANcM8gUUXIBJhjo0bihbvrFE+h9Mke6mGF2RN+0idllYfc++3iM2vemrGGk8xNIQOPkWWBPnutivQ+KV/2daxiMKwyVFzIKVNscW51vdV+KiyVGiC2RDfZQEaXHQhWTyARRY+wsQyM4qln8IxqOTTlAsztnK/mebliToaxhA69y3umeJl9dKLruuZsviW0xNH2PLebN0upnN80r6TdK0frkiscqniaGVkzZZ66hY/n6ZKSYn7GqTRMA/hI0s8M6XwNyLZOrz1E28qFyMbCoriO1AyVBy6enKCtfEH1AxLENZHAWDps8SCeFOSke4ZKUVhBpbvcT9AfJjSn1MhxQ1FFg+QjYYBwHerP1I+hxhRBlWUIbz2HW8qGCcDy/dYFgDAcZe+izga1oBEAWjo2BPiiSY8aX+tWAKLKlGjCcNp6tZ35wPwVVOM7uCnk+DQSGT5qF0ny1oFPpqIESadDCzfgw0FRZbFEdnQMQxJAvldkljL1EGRJQFeb90ONx325gtXfnnqOCrHq4Hm8nsuO/v0QLetO7CzTw8zr9vWHamqmSo0fWsmo9IM056egdBdc1NVJccoMnDHF1JZeEEAmNI0JFmoGZYAJT26mcv+u++yFh4/rHwe/mtffDOsdspVAIDaKVdZRHk7J40TQDhEFcAQhMkhytMQoKHjBOm1I/Ihn5Ie3SLbzdUna5BFFrdm2DVVqZmHi+/cbygocu3s8xHEh9HEEyiyeEBJj25mWo/jQhYjC5OkdspVZnTRnp5hOUb8tLbXiGKwzr4qBwcVAwCqLkrtXM3JhCJLnDBBeFF67dgJ7aM3ERp2Y8znk0WXRn/4pykL3wxLpiir23fCxQe+kYqhOozMRGlokCwJwppdliZYHMQy8sV/VpuJw+clwsUHIjNTsmiiIsk3D8xE5Wt/QY/ZMz2phx+h0bAEEAUp6d0TvbZ/lcYaAeHtaxHsPSTu49lEG+xNz3GVFai87kpo35Sh3X+2Kp2DRZa8dVtQdVFf5Hy8Oe76JAF6nyXVyJphqaDs3N7otGEzAlnW/7rygeegYMO2hEWR5Y2rrMCpO65WOsfBQcXI/ee/EcxtCwB+EyUhqIOfACyq9Nqx01z0fSVJ/c3CL7fbRAGAgg3bPDk/ex2arYtuudn4bMYL8m/dR6PunutRd8/1ntQt3VAzLE5YE4xFFiaOvq8EgY690lk1JVgkKh94jiWfSacf2AUACLSP7VEVvnOft25LgrVMCtQMSwfaYfUbj5kAH51ilYR9kblkd42ZVVI/stZQoMgSJ3xkSWQUrKGxur3xAOnFB74x02zbJ1BkSQdO91qIhglFljg4Of5Sy2e3T46/1Eyn9HPcfoR/Kvl4NXBWNhDw1TgSPXVMEIrQU8cEkWxIFoJQhGQhCEVIFoJQhGQhCEVIFoJQhGQhCEVIFoJQhGQhCEVIFoJQhGQhCEUy6anjuJ/pIQgvoMhCEIqQLAShCMlCEIqQLAShCMlCEIqQLAShCMlCEIqQLAShCMlCEIqQLAShCMlCEIqQLAShCMlCEIqQLAShCMlCEIqQLAShCMlCEIqQLAShCMlCEIqQLAShCMlCEIqQLAShyP8HhId4ZmvVjTMAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x24de272a780>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "wc.generate(myText)\n",
    "plt.imshow(wc)\n",
    "plt.axis('off')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<wordcloud.wordcloud.WordCloud at 0x24de27a42b0>"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wc.to_file('CCTVoutput/chun.jpg')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pynlpir.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  },
  "toc": {
   "nav_menu": {},
   "number_sections": false,
   "sideBar": true,
   "skip_h1_title": false,
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "165px"
   },
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
